#!/bin/bash

source /usr/local/Ascend/ascend-toolkit/set_env.sh
# source /usr/local/Ascend/nnal/atb/set_env.sh
# source /mnt/sfs/qzh/codes/MindIE-LLM/examples/atb_models/output/atb_models/set_env.sh
# source /usr/local/Ascend/mindie/set_env.sh
# export PYTHONPATH=/home/ma-user/work/users/wty/vllm:$PYTHONPATH
export PYTHONPATH=/home/ma-user/work/users/wty/vllm-ascend:/home/ma-user/work/users/wty/lcoc:/home/ma-user/work/users/wty/vllm_071:$PYTHONPATH
export PATH=/home/ma-user/work/users/wty/vllm_benchmark:/home/ma-user/work/wty/valgrid_build/bin:$PATH
export LD_LIBRARY_PATH=/home/ma-user/work/users/wty/valgrid_build/libexec/valgrind:$LD_LIBRARY_PATH
# export PYTHONPATH=/mnt/sfs/wty/workspace/Vllm-MindIE/vllm:/mnt/sfs/wty/workspace/FastChat:$PYTHONPATH
# export ASCEND_SLOG_PRINT_TO_STDOUT=1
# export ASCEND_GLOBAL_LOG_LEVEL=1

export LCCL_DETERMINISTIC=1
export HCCL_DETERMINISTIC=true
export ATB_MATMUL_SHUFFLE_K_ENABLE=false
export ATB_LLM_LCOC_ENABLE=false
export TASK_QUEUE_ENABLE=2

export VLLM_RPC_GET_DATA_TIMEOUT_MS=1800000000
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export TORCH_COMPILE_DEBUG=1
export TORCHDYNAMO_DISABLE=1
# export INFER_MODE=DEFAULT
# export VLLM_USE_V1=1
echo "--------------------ENV VARIABLES----------------------"
echo "LCCL_DETERMINISTIC=$LCCL_DETERMINISTIC"
echo "HCCL_DETERMINISTIC=$HCCL_DETERMINISTIC"
echo "ATB_MATMUL_SHUFFLE_K_ENABLE=$ATB_MATMUL_SHUFFLE_K_ENABLE"
echo "ATB_LLM_LCOC_ENABLE=$ATB_LLM_LCOC_ENABLE"
echo "TASK_QUEUE_ENABLE=$TASK_QUEUE_ENABLE"
echo "VLLM_RPC_GET_DATA_TIMEOUT_MS=$VLLM_RPC_GET_DATA_TIMEOUT_MS"
echo "VLLM_ALLOW_LONG_MAX_MODEL_LEN=$VLLM_ALLOW_LONG_MAX_MODEL_LEN"
echo "-------------------------------------------------------"
python -c "import vllm_ascend; print(vllm_ascend.__path__)"

cd tests
# pytest -s multi_step/test_correctness_llm.py::test_multi_step_llm &> test_log.log
pytest -s test_offline_inference.py &> test_offline_infer_log.log
